Code
library(zoib)
library(tidyverse)
library(GGally)
library(ggpubr)
library(kableExtra)
theme_set(theme_pubr(legend = "bottom"))
# read in raw data
nnns0 <- read.csv("../NNNS_score_data.csv")library(zoib)
library(tidyverse)
library(GGally)
library(ggpubr)
library(kableExtra)
theme_set(theme_pubr(legend = "bottom"))
# read in raw data
nnns0 <- read.csv("../NNNS_score_data.csv")# clean up variable names
# remove the dots at the end of variable names
colnames(nnns0) <- gsub("\\.+$", "", colnames(nnns0))
# replace all the dots in variable names with underscores
colnames(nnns0) <- gsub("\\.+", "_", colnames(nnns0))
nnns <- nnns0 |>
filter(
# exclude neurologic or airway anomaly
Neurologic_Complication == 0, AirwayAnomalyYN == 0,
# include infants from birth to 4 weeks old
# there are two outliers with age at surgery > 30 days
Age_at_Surgery_days <= 30
) |>
# some binary variables have values of 1/2 or Y/N, recode them to 0/1
mutate(
Female = as.integer(sex_1_M_2_F == 2),
Premature = as.integer(Premature == 1),
Extubation_failure = as.integer(Extubation_failure == "Y"),
) |>
# relabel cardiac anatomy
mutate(
Cardiac_Anatomy = factor(
Cardiac_Anatomy, levels = 1:4,
labels = c(
"Single ventricle w/o arch obstruction",
"Single ventricle w/ arch obstruction",
"Two ventricle w/o arch obstruction",
"Two ventricle w/ arch obstruction"
)
),
# for model building purposes, combine the 2 levels w/o arch obstruction
Cardiac_Anatomy_collapsed = fct_collapse(
Cardiac_Anatomy,
"W/o arch obstruction" = c("Single ventricle w/o arch obstruction", "Two ventricle w/o arch obstruction"),
"Single ventricle w/ arch obstruction" = "Single ventricle w/ arch obstruction",
"Two ventricle w/ arch obstruction" = "Two ventricle w/ arch obstruction"
)
) |>
# convert date variables to date class
mutate_at(
vars("Date_PO_feeds_started", "Date_Reaching_Full_PO", "Date_Identified_as_not_yet_full_PO"),
as_date, format = "%m/%d/%Y"
)
# drop unnecessary variables
nnns <- nnns |> select(!c(
"sex_1_M_2_F", # use Female instead
"Intubated_Pre_operatively", "bypass_used", "bypass_time_min", # not of interest
"Neurologic_Complication", "AirwayAnomalyYN" # already excluded
)) \[ f(y_i|\eta_i) = \begin{cases} p_i, & y_i =0 \\ (1-p_i)q_i, & y_i =1 \\ (1 − p_i)(1 − q_i)Beta(\alpha_{i_1}, \alpha_{i_2}) &y_i \in (0, 1) \end{cases} \]
\[ \begin{aligned} logit\left(\mu^{(0,1)} = \frac{\alpha_1}{\alpha_1+ \alpha_2}\right) = \mathbf x_1 \boldsymbol \beta_1 + I_1(\mathbf z_{1,i}\gamma_{1,i}) \\ log\left(v_i = \alpha_1+ \alpha_2\right) = \mathbf x_2 \boldsymbol \beta_2 + I_2(\mathbf z_{2,i}\gamma_{2,i}) \\ logit\left(p_i\right) = \mathbf x_3 \boldsymbol \beta_3 + I_1(\mathbf z_{3,i}\gamma_{3,i}) \\ logit\left(q_i\right) = \mathbf x_4 \boldsymbol \beta_4 + I_1(\mathbf z_{4,i}\gamma_{4 ,i}) \end{aligned} \]
set.seed(11282023) #Set seed, bayesian model!
tictoc::tic()
model1 =zoib(Percent_of_feeds_taken_by_mouth_at_discharge #Response
~Pre_Op_NNNS_attention_score+
Post_Op_NNNS_attention_score+
Female+Genetic_Syndrome_or_Chromosomal_Abnormality+
Age_at_Surgery_days+Cardiac_Anatomy+
Length_of_intubation_days+Length_of_Stay_days+
Extubation_failure+GI_Complication+Premature
#x1 design matrix
| 1 #x2 design matrix- estimating variance
|Pre_Op_NNNS_attention_score+
Post_Op_NNNS_attention_score+
Female+Genetic_Syndrome_or_Chromosomal_Abnormality+
Age_at_Surgery_days+Cardiac_Anatomy+
Length_of_intubation_days+Length_of_Stay_days+
Extubation_failure+GI_Complication+Premature #x3 design matrix
|Pre_Op_NNNS_attention_score+
Post_Op_NNNS_attention_score+
Female+Genetic_Syndrome_or_Chromosomal_Abnormality+
Age_at_Surgery_days+Cardiac_Anatomy+
Length_of_intubation_days+Length_of_Stay_days+
Extubation_failure+GI_Complication+Premature, #x4 design matrix
data = nnns,
n.response = 1,
zero.inflation = TRUE,
one.inflation = TRUE,
link.mu = "logit",
link.x0 = "logit",
link.x1 = "logit",
random = 0,
n.chain = 4, # number of MCMC chains
n.iter = 10000, #number of iterations before burn/thin
n.thin = 10, # thinning period
n.burn = 5000, # burn-in period
seeds = c(11,29,20,23) #vector of seeds for chains to make reproducable
)
tictoc::toc()
saveRDS(model1, file="model1.RData")b: vector of estimates from Eqn 1; that is, g(mu) = xb*b +z*gamma
d: vector of estimates from Eqn 2; that is, log(eta) = xd*d+z*gamma
b0: vector of estimates from Eqn 3; that is, g(p0) = x0*b0 +z*gamma
b1: vector of estimates from Eqn ;4 that is, g(p1) = x1*b1+z*gamma
model1 =readRDS(file="model1.RData")
model1$coeff |> summary()
Iterations = 1:1000
Thinning interval = 1
Number of chains = 4
Sample size per chain = 1000
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
b[1] -8.09465 3.61077 0.057091 0.086664
b[2] 0.16754 0.30532 0.004828 0.006510
b[3] 1.41611 0.54994 0.008695 0.012870
b[4] -0.77692 0.77662 0.012279 0.017591
b[5] 0.56905 1.07698 0.017029 0.028173
b[6] -0.04381 0.12693 0.002007 0.004144
b[7] 1.27294 1.10613 0.017489 0.024815
b[8] -1.49771 0.75022 0.011862 0.016137
b[9] -1.36771 0.91489 0.014466 0.019695
b[10] 0.12185 0.24224 0.003830 0.006870
b[11] -0.01628 0.07887 0.001247 0.002793
b[12] -0.94204 1.89595 0.029978 0.056783
b[13] 0.36259 1.46884 0.023224 0.035131
b[14] 1.20931 1.23171 0.019475 0.026968
b0[1] -26.42023 19.59217 0.309779 3.034188
b0[2] -1.90012 1.33031 0.021034 0.033666
b0[3] -0.17756 1.26060 0.019932 0.025094
b0[4] -1.31170 1.79427 0.028370 0.033291
b0[5] -0.56441 2.46492 0.038974 0.050618
b0[6] -1.66999 0.68567 0.010841 0.022948
b0[7] 34.21280 17.36400 0.274549 3.293759
b0[8] 26.79215 17.24595 0.272682 3.010231
b0[9] 33.87990 17.29435 0.273448 3.139413
b0[10] 1.02017 0.67087 0.010607 0.015519
b0[11] 0.36265 0.16344 0.002584 0.005151
b0[12] -11.40039 5.46852 0.086465 0.168737
b0[13] -6.40822 4.21461 0.066639 0.105051
b0[14] -1.27472 3.02047 0.047758 0.056843
b1[1] -237.08487 121.44880 1.920274 8.186235
b1[2] 10.25191 13.30420 0.210358 0.890090
b1[3] 20.56138 23.44149 0.370643 1.755172
b1[4] 33.35014 29.12542 0.460513 2.346221
b1[5] -64.26438 43.41481 0.686448 3.460382
b1[6] 5.79882 3.94782 0.062421 0.270918
b1[7] 25.88272 41.17875 0.651093 3.887651
b1[8] -25.73807 46.95655 0.742448 4.039620
b1[9] 49.72144 32.03913 0.506583 2.605644
b1[10] 2.79552 9.38518 0.148393 0.820767
b1[11] -0.62349 1.50991 0.023874 0.127070
b1[12] -12.46243 67.82066 1.072339 5.711628
b1[13] -66.75614 66.37378 1.049462 5.622294
b1[14] -16.30196 68.66405 1.085674 6.924297
d 1.69038 0.42303 0.006689 0.008758
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
b[1] -14.8474 -10.48537 -8.11701 -5.76482 -0.71873
b[2] -0.4539 -0.03206 0.17530 0.37322 0.73392
b[3] 0.3054 1.06533 1.43189 1.78935 2.49005
b[4] -2.2652 -1.30988 -0.78185 -0.26866 0.74620
b[5] -1.5229 -0.13971 0.57724 1.28133 2.65090
b[6] -0.2974 -0.12447 -0.04508 0.03760 0.20562
b[7] -0.8719 0.54888 1.24323 1.96193 3.54529
b[8] -2.8418 -2.01442 -1.53797 -1.02424 0.06508
b[9] -3.0918 -2.00869 -1.40385 -0.76150 0.46411
b[10] -0.3694 -0.03303 0.12149 0.27391 0.59904
b[11] -0.1849 -0.06551 -0.01220 0.03614 0.12824
b[12] -4.4815 -2.17242 -1.04193 0.17430 3.12491
b[13] -2.4819 -0.62871 0.36894 1.34162 3.31607
b[14] -1.1930 0.39759 1.18757 2.01486 3.66590
b0[1] -66.6996 -40.42623 -24.71704 -11.53378 6.47311
b0[2] -4.7803 -2.73308 -1.81381 -1.00625 0.47801
b0[3] -2.7282 -0.98068 -0.19594 0.64975 2.38081
b0[4] -5.0127 -2.44438 -1.30167 -0.13431 2.20775
b0[5] -5.7475 -2.17587 -0.48875 1.11533 4.00194
b0[6] -3.1933 -2.07548 -1.59464 -1.17127 -0.51880
b0[7] 7.3892 19.53460 31.89964 47.67642 68.90797
b0[8] 0.5156 12.51126 24.39181 40.07407 61.85394
b0[9] 7.3885 19.65624 31.50292 47.13116 68.88510
b0[10] -0.1272 0.54728 0.97938 1.44124 2.48895
b0[11] 0.0770 0.24637 0.35128 0.46016 0.72186
b0[12] -23.2582 -14.76718 -10.99724 -7.55044 -1.96659
b0[13] -15.2413 -9.04412 -6.08288 -3.46518 1.15036
b0[14] -7.7286 -3.12670 -1.10839 0.76417 4.10107
b1[1] -500.0818 -312.38491 -230.56351 -152.76696 -22.78017
b1[2] -14.6438 1.70753 9.67538 18.62647 38.78106
b1[3] -24.8474 4.35148 19.84623 36.96641 66.35291
b1[4] -18.4475 12.97897 31.41279 51.19150 94.14057
b1[5] -160.4817 -91.65415 -60.79211 -33.70751 10.31647
b1[6] -1.3629 3.07907 5.60600 8.39559 13.98074
b1[7] -52.0082 -2.25615 24.29563 54.02553 106.77353
b1[8] -119.0373 -57.07532 -24.94217 5.00622 65.21001
b1[9] -7.5283 27.68274 47.62888 69.52931 118.46872
b1[10] -16.6639 -3.34039 2.90113 8.92324 21.56982
b1[11] -3.5802 -1.58695 -0.62721 0.37284 2.37157
b1[12] -157.2316 -54.54321 -6.41170 34.65294 106.95123
b1[13] -206.2028 -108.41730 -62.75804 -21.79855 55.90084
b1[14] -161.1771 -59.30652 -18.53089 27.97054 123.19694
d 0.8313 1.41093 1.70051 1.97617 2.48427
traceplot(model1$coeff)autocorr.plot(model1$coeff)gelman.diag(model1$coeff)Potential scale reduction factors:
Point est. Upper C.I.
b[1] 1.00 1.00
b[2] 1.00 1.00
b[3] 1.00 1.00
b[4] 1.00 1.00
b[5] 1.00 1.01
b[6] 1.00 1.01
b[7] 1.00 1.01
b[8] 1.00 1.01
b[9] 1.00 1.01
b[10] 1.00 1.01
b[11] 1.01 1.02
b[12] 1.01 1.02
b[13] 1.00 1.01
b[14] 1.00 1.00
b0[1] 1.14 1.39
b0[2] 1.00 1.01
b0[3] 1.00 1.01
b0[4] 1.00 1.00
b0[5] 1.00 1.00
b0[6] 1.00 1.00
b0[7] 1.22 1.60
b0[8] 1.22 1.60
b0[9] 1.23 1.62
b0[10] 1.00 1.01
b0[11] 1.00 1.00
b0[12] 1.00 1.00
b0[13] 1.00 1.00
b0[14] 1.00 1.01
b1[1] 1.01 1.01
b1[2] 1.01 1.01
b1[3] 1.01 1.01
b1[4] 1.02 1.07
b1[5] 1.03 1.09
b1[6] 1.04 1.11
b1[7] 1.05 1.12
b1[8] 1.08 1.22
b1[9] 1.02 1.04
b1[10] 1.02 1.07
b1[11] 1.03 1.06
b1[12] 1.04 1.11
b1[13] 1.06 1.18
b1[14] 1.01 1.03
d 1.00 1.00
Multivariate psrf
1.24